#### LIBRARIES ####
library(tidyverse)
library(randomizr)

#### IMPORT DATASET ####

df <- read.csv("Data/exp.csv")

#### ANALYSIS ####

# Calculating the difference in means
d1 <- mean(df$y[df$treatment=="E"],na.rm=T)-
  mean(df$y[df$treatment=="B"],na.rm=T)


# Randomization inference
set.seed(268329051)
sims <- 10000
s1 <- rep(NA,sims)
for(i in 1:sims){
  df$d <- simple_ra(N=length(df$treatment),
                    prob_each=c(1/2,1/2))
  s1[i] <- mean(df$y[df$d==1],na.rm=T)-
    mean(df$y[df$d==0],na.rm=T) 
}

d1
mean(d1<s1)

res <- data.frame(s1)
ggplot(res,aes(x=s1))+
  geom_histogram(fill="white",color="black")+
  geom_vline(xintercept = d1,color="red")+
  theme_bw()

#### FIGURE 7 ####
Condition <- c("E","B")
simdim <- data.frame(Condition)
simdim$mean <- NA
simdim$q975 <- NA
simdim$q025 <- NA


set.seed(12589)
sims <- 10000
for(i in Condition){
  temp <- rep(NA,sims)
  for(j in 1:sims){
    temp[j] <- mean(sample(df$y[df$treatment==i],
                           replace=T,size=sum(df$treatment==i)))
  }
  simdim$mean[simdim$Condition==i] <- mean(df$y[df$treatment==i],na.rm=T)
  simdim$q975[simdim$Condition==i] <- quantile(temp,.975)
  simdim$q025[simdim$Condition==i] <- quantile(temp,.025)
}


ggplot(simdim,aes(y=Condition,x=mean))+
  geom_point(data=simdim,aes(x=mean,y=Condition))+
  geom_errorbarh(data=simdim,aes(xmax=q975,xmin=q025),height=0)+
  xlab("Opt-in Rate")+
  scale_y_discrete(labels=c("Baseline",
                            "Expressive"))+
  scale_x_continuous(breaks=c(.25,.5,.75))+
  theme_bw()

ggsave(file="Figures/Fig7.pdf",height=2,width=4)

#### Additional analysis ####

# Removing respondents from the baseline group - we are 
# interested in comparing those who do/don't opt in for
# expressive benefits
pt <- df[df$treatment!="B",]

summary(lm(vb.express ~ y,data=pt))
summary(lm(vb.ir ~ y,data=pt))
summary(lm(vb.cduty ~ y,data=pt))
summary(lm(vb.pduty ~ y,data=pt))
summary(lm(vb.group ~ y,data=pt))
summary(lm(vb.kant ~ y,data=pt))
summary(lm(vb.pressure ~ y,data=pt))

## Check for balance ##
summary(lm(vb.express ~ treatmentE,data=df))
summary(lm(vb.ir ~ treatmentE,data=df))
summary(lm(vb.cduty ~ treatmentE,data=df))
summary(lm(vb.pduty ~ treatmentE,data=df))
summary(lm(vb.group ~ treatmentE,data=df))
summary(lm(vb.kant ~ treatmentE,data=df))
summary(lm(vb.pressure ~ treatmentE,data=df))

#### Voting frequency ####

## Comparing self-reported voting among those who do/don't opt in to long survey ##
#expressive group
summary(lm(vfreqb ~ y,data=df[df$treatmentE==1,]))
# baseline group
summary(lm(vfreqb ~ y,data=df[df$treatmentE==0,]))



#### Voting Frequency: expressive vs non-expressive respondents ####
dftest <- df%>%
  rename(d=y,z=treatmentE,x=vfreqb)%>%
  filter(!is.na(x))%>%
  select(d,z,x)

NT <- mean(dftest$x[dftest$z==1&dftest$d==0])
AT <- mean(dftest$x[dftest$z==0&dftest$d==1])
piNT <- mean(dftest$d[dftest$z==1]==0)
piAT <- mean(dftest$d[dftest$z==0]==1)
piC <- 1-piNT-piAT
C1 <- ((piNT+piC)*mean(dftest$x[dftest$z==0&dftest$d==0])-NT*piNT)/piC
C2 <- ((piAT+piC)*mean(dftest$x[dftest$z==1&dftest$d==1])-AT*piAT)/piC
C <- C1*mean(dftest$z)+C2*(1-mean(dftest$z))
result <- C-NT
# difference in means:
result

sims <- 10000
results <- rep(NA,sims)
set.seed(231093)
for(i in 1:sims){
  dfbt <- sample_n(dftest,length(dftest$x),replace=T)
  
  NT <- mean(dfbt$x[dfbt$z==1&dfbt$d==0])
  AT <- mean(dfbt$x[dfbt$z==0&dfbt$d==1])
  piNT <- mean(dfbt$d[dfbt$z==1]==0)
  piAT <- mean(dfbt$d[dfbt$z==0]==1)
  piC <- 1-piNT-piAT
  C1 <- ((piNT+piC)*mean(dfbt$x[dfbt$z==0&dfbt$d==0])-NT*piNT)/piC
  C2 <- ((piAT+piC)*mean(dfbt$x[dfbt$z==1&dfbt$d==1])-AT*piAT)/piC
  C <- C1*mean(dfbt$z)+C2*(1-mean(dfbt$z))
  results[i] <- C-NT
}

#p-value:
1-mean(results>0)


#### FIGURE 8a ####

pt <- df[df$treatmentE==1,]

reasons <- (c("Civic Duty","Pivotality","Group\n Pivotality","Expressive\n Voting",
              "Kantian","Partisan\n Duty","Social\n Pressure"))
vr <- data.frame(reasons)
vr$mean2 <- NA


pt2 <- pt

vr$mean2[1] <- mean(pt2$vb.cduty,na.rm=T)
vr$mean2[2] <- mean(pt2$vb.ir,na.rm=T)
vr$mean2[3] <- mean(pt2$vb.group,na.rm=T)
vr$mean2[4] <- mean(pt2$vb.express,na.rm=T)
vr$mean2[5] <- mean(pt2$vb.kant,na.rm=T)
vr$mean2[6] <- mean(pt2$vb.pduty,na.rm=T)
vr$mean2[7] <- mean(pt2$vb.pressure,na.rm=T)

vr$reasons2 <- as.factor(vr$reasons)
vr$reasons2 <-  factor(vr$reasons2,levels(vr$reasons2)[c(7,4,3,6,1,5,2)])

ggplot(vr,aes(x=mean2,y=reasons2))+
  geom_vline(xintercept = 0,color="grey50")+
  geom_barh(stat="identity",width=.5,fill="gray50",color="gray10")+
  ylab("Reasons for Voting")+
  xlab("Rate of Agreement\n ")+
  theme_bw()
ggsave("Figures/Fig8-1.pdf",height=5,width=4)


#### FIGURE 8b ####

reasons <- rep(c("Civic Duty","Pivotality","Group\nPivotality","Expressive\nVoting",
                 "Kantian","Partisan\nDuty","Social\nPressure"),3)
vr <- data.frame(reasons)
vr$mean <- NA
vr$min <- NA
vr$max <- NA

#### 1 Partisan Duty ####
df <- df%>%
  select(-d)
dftest <- df%>%
  rename(d=y,z=treatmentE,x=vb.pduty)%>%
  filter(!is.na(x))%>%
  select(d,z,x)

NT <- mean(dftest$x[dftest$z==1&dftest$d==0])
AT <- mean(dftest$x[dftest$z==0&dftest$d==1])
piNT <- mean(dftest$d[dftest$z==1]==0)
piAT <- mean(dftest$d[dftest$z==0]==1)
piC <- 1-piNT-piAT
C1 <- ((piNT+piC)*mean(dftest$x[dftest$z==0&dftest$d==0])-NT*piNT)/piC
C2 <- ((piAT+piC)*mean(dftest$x[dftest$z==1&dftest$d==1])-AT*piAT)/piC
C <- C1*mean(dftest$z)+C2*(1-mean(dftest$z))
result <- C-NT

sims <- 10000
results <- rep(NA,sims)
set.seed(231093)
for(i in 1:sims){
  dfbt <- sample_n(dftest,length(dftest$x),replace=T)
  
  NT <- mean(dfbt$x[dfbt$z==1&dfbt$d==0])
  AT <- mean(dfbt$x[dfbt$z==0&dfbt$d==1])
  piNT <- mean(dfbt$d[dfbt$z==1]==0)
  piAT <- mean(dfbt$d[dfbt$z==0]==1)
  piC <- 1-piNT-piAT
  C1 <- ((piNT+piC)*mean(dfbt$x[dfbt$z==0&dfbt$d==0])-NT*piNT)/piC
  C2 <- ((piAT+piC)*mean(dfbt$x[dfbt$z==1&dfbt$d==1])-AT*piAT)/piC
  C <- C1*mean(dfbt$z)+C2*(1-mean(dfbt$z))
  results[i] <- C-NT
}

resdf <- data.frame(results)
lb <- quantile(results,0.025)
ub <- quantile(results,0.975)

vr$min[vr$reasons=="Partisan\nDuty"] <- lb
vr$max[vr$reasons=="Partisan\nDuty"] <- ub
vr$mean[vr$reasons=="Partisan\nDuty"] <- result


#### 2 Expressive ####
dftest <- df%>%
  rename(d=y,z=treatmentE,x=vb.express)%>%
  filter(!is.na(x))%>%
  select(d,z,x)

NT <- mean(dftest$x[dftest$z==1&dftest$d==0])
AT <- mean(dftest$x[dftest$z==0&dftest$d==1])
piNT <- mean(dftest$d[dftest$z==1]==0)
piAT <- mean(dftest$d[dftest$z==0]==1)
piC <- 1-piNT-piAT
C1 <- ((piNT+piC)*mean(dftest$x[dftest$z==0&dftest$d==0])-NT*piNT)/piC
C2 <- ((piAT+piC)*mean(dftest$x[dftest$z==1&dftest$d==1])-AT*piAT)/piC
C <- C1*mean(dftest$z)+C2*(1-mean(dftest$z))
result <- C-NT

sims <- 10000
results <- rep(NA,sims)
set.seed(231093)
for(i in 1:sims){
  dfbt <- sample_n(dftest,length(dftest$x),replace=T)
  
  NT <- mean(dfbt$x[dfbt$z==1&dfbt$d==0])
  AT <- mean(dfbt$x[dfbt$z==0&dfbt$d==1])
  piNT <- mean(dfbt$d[dfbt$z==1]==0)
  piAT <- mean(dfbt$d[dfbt$z==0]==1)
  piC <- 1-piNT-piAT
  C1 <- ((piNT+piC)*mean(dfbt$x[dfbt$z==0&dfbt$d==0])-NT*piNT)/piC
  C2 <- ((piAT+piC)*mean(dfbt$x[dfbt$z==1&dfbt$d==1])-AT*piAT)/piC
  C <- C1*mean(dfbt$z)+C2*(1-mean(dfbt$z))
  results[i] <- C-NT
}

resdf <- data.frame(results)
lb <- quantile(results,0.025)
ub <- quantile(results,0.975)

vr$min[vr$reasons=="Expressive\nVoting"] <- lb
vr$max[vr$reasons=="Expressive\nVoting"] <- ub
vr$mean[vr$reasons=="Expressive\nVoting"] <- result

#### 3 Pressure ####
dftest <- df%>%
  rename(d=y,z=treatmentE,x=vb.pressure)%>%
  filter(!is.na(x))%>%
  select(d,z,x)

NT <- mean(dftest$x[dftest$z==1&dftest$d==0])
AT <- mean(dftest$x[dftest$z==0&dftest$d==1])
piNT <- mean(dftest$d[dftest$z==1]==0)
piAT <- mean(dftest$d[dftest$z==0]==1)
piC <- 1-piNT-piAT
C1 <- ((piNT+piC)*mean(dftest$x[dftest$z==0&dftest$d==0])-NT*piNT)/piC
C2 <- ((piAT+piC)*mean(dftest$x[dftest$z==1&dftest$d==1])-AT*piAT)/piC
C <- C1*mean(dftest$z)+C2*(1-mean(dftest$z))
result <- C-NT

sims <- 10000
results <- rep(NA,sims)
set.seed(231093)
for(i in 1:sims){
  dfbt <- sample_n(dftest,length(dftest$x),replace=T)
  
  NT <- mean(dfbt$x[dfbt$z==1&dfbt$d==0])
  AT <- mean(dfbt$x[dfbt$z==0&dfbt$d==1])
  piNT <- mean(dfbt$d[dfbt$z==1]==0)
  piAT <- mean(dfbt$d[dfbt$z==0]==1)
  piC <- 1-piNT-piAT
  C1 <- ((piNT+piC)*mean(dfbt$x[dfbt$z==0&dfbt$d==0])-NT*piNT)/piC
  C2 <- ((piAT+piC)*mean(dfbt$x[dfbt$z==1&dfbt$d==1])-AT*piAT)/piC
  C <- C1*mean(dfbt$z)+C2*(1-mean(dfbt$z))
  results[i] <- C-NT
}

resdf <- data.frame(results)
lb <- quantile(results,0.025)
ub <- quantile(results,0.975)

vr$min[vr$reasons=="Social\nPressure"] <- lb
vr$max[vr$reasons=="Social\nPressure"] <- ub
vr$mean[vr$reasons=="Social\nPressure"] <- result


#### 4 Kant ####
dftest <- df%>%
  rename(d=y,z=treatmentE,x=vb.kant)%>%
  filter(!is.na(x))%>%
  select(d,z,x)

NT <- mean(dftest$x[dftest$z==1&dftest$d==0])
AT <- mean(dftest$x[dftest$z==0&dftest$d==1])
piNT <- mean(dftest$d[dftest$z==1]==0)
piAT <- mean(dftest$d[dftest$z==0]==1)
piC <- 1-piNT-piAT
C1 <- ((piNT+piC)*mean(dftest$x[dftest$z==0&dftest$d==0])-NT*piNT)/piC
C2 <- ((piAT+piC)*mean(dftest$x[dftest$z==1&dftest$d==1])-AT*piAT)/piC
C <- C1*mean(dftest$z)+C2*(1-mean(dftest$z))
result <- C-NT

sims <- 10000
results <- rep(NA,sims)
set.seed(231093)
for(i in 1:sims){
  dfbt <- sample_n(dftest,length(dftest$x),replace=T)
  
  NT <- mean(dfbt$x[dfbt$z==1&dfbt$d==0])
  AT <- mean(dfbt$x[dfbt$z==0&dfbt$d==1])
  piNT <- mean(dfbt$d[dfbt$z==1]==0)
  piAT <- mean(dfbt$d[dfbt$z==0]==1)
  piC <- 1-piNT-piAT
  C1 <- ((piNT+piC)*mean(dfbt$x[dfbt$z==0&dfbt$d==0])-NT*piNT)/piC
  C2 <- ((piAT+piC)*mean(dfbt$x[dfbt$z==1&dfbt$d==1])-AT*piAT)/piC
  C <- C1*mean(dfbt$z)+C2*(1-mean(dfbt$z))
  results[i] <- C-NT
}

resdf <- data.frame(results)
lb <- quantile(results,0.025)
ub <- quantile(results,0.975)

vr$min[vr$reasons=="Kantian"] <- lb
vr$max[vr$reasons=="Kantian"] <- ub
vr$mean[vr$reasons=="Kantian"] <- result


#### 5 Group ####
dftest <- df%>%
  rename(d=y,z=treatmentE,x=vb.group)%>%
  filter(!is.na(x))%>%
  select(d,z,x)

NT <- mean(dftest$x[dftest$z==1&dftest$d==0])
AT <- mean(dftest$x[dftest$z==0&dftest$d==1])
piNT <- mean(dftest$d[dftest$z==1]==0)
piAT <- mean(dftest$d[dftest$z==0]==1)
piC <- 1-piNT-piAT
C1 <- ((piNT+piC)*mean(dftest$x[dftest$z==0&dftest$d==0])-NT*piNT)/piC
C2 <- ((piAT+piC)*mean(dftest$x[dftest$z==1&dftest$d==1])-AT*piAT)/piC
C <- C1*mean(dftest$z)+C2*(1-mean(dftest$z))
result <- C-NT

sims <- 10000
results <- rep(NA,sims)
set.seed(231093)
for(i in 1:sims){
  dfbt <- sample_n(dftest,length(dftest$x),replace=T)
  
  NT <- mean(dfbt$x[dfbt$z==1&dfbt$d==0])
  AT <- mean(dfbt$x[dfbt$z==0&dfbt$d==1])
  piNT <- mean(dfbt$d[dfbt$z==1]==0)
  piAT <- mean(dfbt$d[dfbt$z==0]==1)
  piC <- 1-piNT-piAT
  C1 <- ((piNT+piC)*mean(dfbt$x[dfbt$z==0&dfbt$d==0])-NT*piNT)/piC
  C2 <- ((piAT+piC)*mean(dfbt$x[dfbt$z==1&dfbt$d==1])-AT*piAT)/piC
  C <- C1*mean(dfbt$z)+C2*(1-mean(dfbt$z))
  results[i] <- C-NT
}

resdf <- data.frame(results)
lb <- quantile(results,0.025)
ub <- quantile(results,0.975)

vr$min[vr$reasons=="Group\nPivotality"] <- lb
vr$max[vr$reasons=="Group\nPivotality"] <- ub
vr$mean[vr$reasons=="Group\nPivotality"] <- result

#### 6 Pivotality ####
dftest <- df%>%
  rename(d=y,z=treatmentE,x=vb.ir)%>%
  filter(!is.na(x))%>%
  select(d,z,x)

NT <- mean(dftest$x[dftest$z==1&dftest$d==0])
AT <- mean(dftest$x[dftest$z==0&dftest$d==1])
piNT <- mean(dftest$d[dftest$z==1]==0)
piAT <- mean(dftest$d[dftest$z==0]==1)
piC <- 1-piNT-piAT
C1 <- ((piNT+piC)*mean(dftest$x[dftest$z==0&dftest$d==0])-NT*piNT)/piC
C2 <- ((piAT+piC)*mean(dftest$x[dftest$z==1&dftest$d==1])-AT*piAT)/piC
C <- C1*mean(dftest$z)+C2*(1-mean(dftest$z))
result <- C-NT

sims <- 10000
results <- rep(NA,sims)
set.seed(231093)
for(i in 1:sims){
  dfbt <- sample_n(dftest,length(dftest$x),replace=T)
  
  NT <- mean(dfbt$x[dfbt$z==1&dfbt$d==0])
  AT <- mean(dfbt$x[dfbt$z==0&dfbt$d==1])
  piNT <- mean(dfbt$d[dfbt$z==1]==0)
  piAT <- mean(dfbt$d[dfbt$z==0]==1)
  piC <- 1-piNT-piAT
  C1 <- ((piNT+piC)*mean(dfbt$x[dfbt$z==0&dfbt$d==0])-NT*piNT)/piC
  C2 <- ((piAT+piC)*mean(dfbt$x[dfbt$z==1&dfbt$d==1])-AT*piAT)/piC
  C <- C1*mean(dfbt$z)+C2*(1-mean(dfbt$z))
  results[i] <- C-NT
}

resdf <- data.frame(results)
lb <- quantile(results,0.025)
ub <- quantile(results,0.975)

vr$min[vr$reasons=="Pivotality"] <- lb
vr$max[vr$reasons=="Pivotality"] <- ub
vr$mean[vr$reasons=="Pivotality"] <- result

#### 7 Civic Duty ####
dftest <- df%>%
  rename(d=y,z=treatmentE,x=vb.cduty)%>%
  filter(!is.na(x))%>%
  select(d,z,x)

NT <- mean(dftest$x[dftest$z==1&dftest$d==0])
AT <- mean(dftest$x[dftest$z==0&dftest$d==1])
piNT <- mean(dftest$d[dftest$z==1]==0)
piAT <- mean(dftest$d[dftest$z==0]==1)
piC <- 1-piNT-piAT
C1 <- ((piNT+piC)*mean(dftest$x[dftest$z==0&dftest$d==0])-NT*piNT)/piC
C2 <- ((piAT+piC)*mean(dftest$x[dftest$z==1&dftest$d==1])-AT*piAT)/piC
C <- C1*mean(dftest$z)+C2*(1-mean(dftest$z))
result <- C-NT
result

sims <- 10000
results <- rep(NA,sims)
set.seed(231093)
for(i in 1:sims){
  dfbt <- sample_n(dftest,length(dftest$x),replace=T)
  
  NT <- mean(dfbt$x[dfbt$z==1&dfbt$d==0])
  AT <- mean(dfbt$x[dfbt$z==0&dfbt$d==1])
  piNT <- mean(dfbt$d[dfbt$z==1]==0)
  piAT <- mean(dfbt$d[dfbt$z==0]==1)
  piC <- 1-piNT-piAT
  C1 <- ((piNT+piC)*mean(dfbt$x[dfbt$z==0&dfbt$d==0])-NT*piNT)/piC
  C2 <- ((piAT+piC)*mean(dfbt$x[dfbt$z==1&dfbt$d==1])-AT*piAT)/piC
  C <- C1*mean(dfbt$z)+C2*(1-mean(dfbt$z))
  results[i] <- C-NT
}

resdf <- data.frame(results)
lb <- quantile(results,0.025)
ub <- quantile(results,0.975)

vr$min[vr$reasons=="Civic Duty"] <- lb
vr$max[vr$reasons=="Civic Duty"] <- ub
vr$mean[vr$reasons=="Civic Duty"] <- result


#### 8 FIGURE 8b ####

vr$reasons2 <- as.factor(vr$reasons)
print(levels(vr$reasons2))  ## This will show the levels of x are "Levels: a b c d e"

vr$reasons2 <-  factor(vr$reasons2,levels(vr$reasons2)[c(7,4,3,6,1,5,2)])

library(plyr)
vr$reasons2 <- revalue(vr$reasons2, c("Kantian"="Kantian\nEthic"))
detach("package:plyr", unload = TRUE)

ggplot(vr,aes(x=mean,y=reasons2,color=type))+
  geom_vline(xintercept = 0,color="grey50")+
  geom_point(color="skyblue3",size=2)+
  geom_errorbarh(aes(xmax=max,xmin=min),height=0,
                color="skyblue3")+
  ylab("Reasons for Voting")+
  xlab("Difference in Means\n(Expressive vs. Non-Expressive Respondents)")+
  theme_bw()
ggsave(file="Figures/Fig8-2.pdf",height=5,width=4)

vr1 <- vr

#### FIGURE A.2 ####
reasons <- (c("Civic Duty","Pivotality","Group\n Pivotality","Expressive\n Voting",
              "Kantian","Partisan\n Duty","Social\n Pressure"))
vr <- data.frame(reasons)
vr$mean <- NA
vr$SE <- NA

pt <- df[df$treatmentE==1,]

vr$mean[1] <- lm(vb.cduty ~ y,data=pt)$coefficients[2]
vr$mean[2] <- lm(vb.ir ~ y,data=pt)$coefficients[2]
vr$mean[3] <- lm(vb.group ~ y,data=pt)$coefficients[2]
vr$mean[4] <- lm(vb.express ~ y,data=pt)$coefficients[2]
vr$mean[5] <- lm(vb.kant ~ y,data=pt)$coefficients[2]
vr$mean[6] <- lm(vb.pduty ~ y,data=pt)$coefficients[2]
vr$mean[7] <- lm(vb.pressure ~ y,data=pt)$coefficients[2]

vr$SE[1] <- coef(summary(lm(vb.cduty ~ y,data=pt)))[2,2]
vr$SE[2] <- coef(summary(lm(vb.ir ~ y,data=pt)))[2,2]
vr$SE[3] <- coef(summary(lm(vb.group ~ y,data=pt)))[2,2]
vr$SE[4] <- coef(summary(lm(vb.express ~ y,data=pt)))[2,2]
vr$SE[5] <- coef(summary(lm(vb.kant ~ y,data=pt)))[2,2]
vr$SE[6] <- coef(summary(lm(vb.pduty ~ y,data=pt)))[2,2]
vr$SE[7] <- coef(summary(lm(vb.pressure ~ y,data=pt)))[2,2]


ggplot(vr,aes(x=mean,y=reasons))+
  geom_vline(xintercept = 0,color="grey50")+
  geom_errorbarh(aes(xmax=mean+1.96*SE,xmin=mean-1.96*SE),height=0,color="skyblue3")+
  geom_point(color="skyblue3",size=2)+
  ylab("Reasons for Voting")+
  xlab("Difference in Means\n(Long vs. short survey-takers)")+
  theme_bw()
ggsave("Figures/FigA2.pdf",height=5,width=4)






